{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 1\n",
    "%aimport TableRecognition\n",
    "import cv2\n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "import time as t\n",
    "import cv_algorithms\n",
    "import collections\n",
    "import operator\n",
    "import scipy.signal\n",
    "import scipy.spatial.distance\n",
    "import TableRecognition\n",
    "from UliEngineering.SignalProcessing.Selection import *\n",
    "print (\"OpenCV Version : %s \" % cv2.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Run './download-oldweather.py Northwind 1947' to get the files\n",
    "filename = \"Northwind/Northwind-WAG-282-1947-0063-0.jpg\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Image preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "img = cv2.imread(filename, flags=cv2.IMREAD_COLOR)\n",
    "if img is None:\n",
    "    raise ValueError(\"File {0} does not exist\".format(filename))\n",
    "imgGrey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
    "imgThresh = cv2.threshold(imgGrey, 150, 255, cv2.THRESH_BINARY_INV)[1]\n",
    "imgThreshInv = cv2.threshold(imgGrey, 150, 255, cv2.THRESH_BINARY)[1]\n",
    "\n",
    "imgDil = cv2.dilate(imgThresh, np.ones((5, 5), np.uint8))\n",
    "imgEro = cv2.erode(imgDil, np.ones((4, 4), np.uint8))\n",
    "\n",
    "plt.gcf().set_size_inches(20,18)\n",
    "plt.imshow(imgDil, cmap=\"Greys_r\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#\n",
    "ix = img.copy()\n",
    "contour_analyzer = TableRecognition.ContourAnalyzer(imgDil)\n",
    "contour_analyzer.filter_contours(min_area=400)\n",
    "contour_analyzer.build_graph()\n",
    "contour_analyzer.remove_non_table_nodes()\n",
    "contour_analyzer.compute_contour_bounding_boxes()\n",
    "contour_analyzer.separate_supernode()\n",
    "print(contour_analyzer.does_page_have_valid_table())\n",
    "contour_analyzer.find_empty_cells(imgThreshInv)\n",
    "contour_analyzer.visualize_contours(ix)\n",
    "\n",
    "plt.gcf().set_size_inches(20,18)\n",
    "plt.imshow(ix)\n",
    "\n",
    "#cv2.imwrite(\"/ram/Northwind-1.png\", ix)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find bounding box corner clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "contour_analyzer.find_corner_clusters()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "imgCopy = img.copy()\n",
    "plt.gcf().set_size_inches(20,18)\n",
    "contour_analyzer.visualize_corner_clusters(imgCopy)\n",
    "plt.imshow(imgCopy)\n",
    "plt.title(\"Corner color shows number of merged nodes\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute cells from contours and clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "contour_analyzer.compute_cell_hulls()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Recompute cell boundaries based on nodes & compute table angle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#### %matplotlib inline\n",
    "ix = img.copy()\n",
    "plt.gcf().set_size_inches(20,18)\n",
    "\n",
    "contour_analyzer.draw_all_cell_hulls(ix, xscale=0.8, yscale=0.8)\n",
    "plt.imshow(ix)\n",
    "#cv2.imwrite(\"/ram/ATR.png\", ix)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find missing cells by masking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ix = img.copy()\n",
    "\n",
    "contour_analyzer.find_fine_table_corners()\n",
    "missing_contours = contour_analyzer.compute_filtered_missing_cell_contours()\n",
    "\n",
    "icm = cv2.drawContours(ix, missing_contours, -1, (0, 255, 0), 3)\n",
    "\n",
    "plt.gcf().set_size_inches(20,18)\n",
    "plt.imshow(ix, cmap=\"Greys_r\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Insert clusters into main contours & perform second run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "contour_analyzer.contours += missing_contours\n",
    "\n",
    "contour_analyzer.compute_contour_bounding_boxes()\n",
    "contour_analyzer.find_empty_cells(imgThreshInv)\n",
    "\n",
    "contour_analyzer.find_corner_clusters()\n",
    "contour_analyzer.compute_cell_hulls()\n",
    "contour_analyzer.find_fine_table_corners()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Find cluster centers and group into hgroups and vgroups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "contour_analyzer.compute_table_coordinates(5.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ix = img.copy()\n",
    "\n",
    "contour_analyzer.draw_table_coord_cell_hulls(ix, xscale=.9, yscale=.9)\n",
    "\n",
    "plt.gcf().set_size_inches(20,18)\n",
    "plt.imshow(ix)\n",
    "#cv2.imwrite(\"/ram/ATR2.png\", ix)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Extract table cell image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.gcf().set_size_inches(20,18)\n",
    "\n",
    "#ix = cv2.cvtColor(ix, cv2.COLOR_GRAY2BGR)\n",
    "ix = img.copy()\n",
    "ix = contour_analyzer.extract_cell_from_image(ix, (14,6))\n",
    "\n",
    "plt.imshow(ix, cmap=\"Greys_r\")\n",
    "#cv2.imwrite(\"/ram/cell.png\", ix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.gcf().set_size_inches(20,18)\n",
    "\n",
    "ix = img.copy()\n",
    "ix = contour_analyzer.extract_cell_from_image(ix, (14,6), xscale=1, yscale=1, mark_color=None)\n",
    "plt.imshow(ix, cmap=\"Greys_r\")\n",
    "cv2.imwrite(\"/ram/airwetbulb.png\", ix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
